import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize']=12,8
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import MinMaxScaler,StandardScaler,MaxAbsScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import FunctionTransformer
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import itertools
data=pd.read_excel('Combine data/sectorwise.xlsx')
data=data.drop_duplicates()
data.head()
df1=data[['Job_Title','Company_Name','Posted_Date','Sector']]
df1=df1.drop_duplicates()
df1['Posted_Date']=pd.to_datetime(df1['Posted_Date'])
df=df1.groupby('Posted_Date')['Posted_Date'].count()
df.describe()
sector=df1.groupby('Sector')['Sector'].count()
sector.sort_values(ascending=False)
df1['Sector'].replace(['Information Technology \n'],'Information Technology',inplace=True)
df1['Sector'].replace(['Business, consulting and management','business, consulting and management',''],'Business Consulting and Management',inplace=True)
df1['Sector'].replace(['Marketing, Advertising, and PR','marketing, Advertising, and PR','Marketing, Advertising and PR','Marketing Advertising and PR'],'Marketing, Advertising and PR',inplace=True)
df1['Sector'].replace(['Accountancy, banking, and finance jobs','Finance','Accounting , Finance and Banking'],'Accounting, Finance and Banking',inplace=True)
df1['Sector'].replace(['Public service and Administration','Public services and Administration','public services and Administration'],'Public Service and Administration',inplace=True)
df1['Sector'].replace(['Science and Pharmaceuticals \n'],'Science and Pharmaceuticals',inplace=True)
df1['Sector'].replace(['Leisure, Sports, and Tourism'],'Leisure, Sports and Tourism',inplace=True)
df1['Sector'].replace(['Transport and logistics','Trasport and Logistics'],'Transport and Logistics',inplace=True)
df1['Sector'].replace(['Media and the Internet'],'Media and Internet',inplace=True)
df1['Sector'].replace(['Teacher, Training and Education'],'Teacher Training and Education',inplace=True)
df1['Sector'].replace(['Social care'],'Social Care',inplace=True)
df1['Sector'].replace(['Energy and utilities'],'Energy and Utilities',inplace=True)
df1['Sector'].replace(['Law enforcement and security'],'Law Enforcement and Security',inplace=True)
sector=df1.groupby('Sector')['Sector'].count()
sector.sort_values(ascending=False)
fig1, ax1 = plt.subplots(figsize=(25, 18))
sns.set(font_scale=3)
ax1.pie(sector.sort_values(ascending=False)[0:10], labels=sector.sort_values(ascending=False).index[0:10], autopct='%1.0f%%', startangle=90,textprops={'fontsize': 25})
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
ax1.set_title('Top 10 Sectors\n',fontweight='bold')
plt.show()
plt.tight_layout()
sec=sector.sort_values(ascending=False)
fig_dims = (25, 15)
sns.set(font_scale=3)
fig, ax = plt.subplots(figsize=fig_dims)
sns.barplot(x=sec.values,y=sec.index,ax=ax)
plt.ylabel('Sector',fontweight='bold')
ax.set_title('Sector-wise Job Postings\n',fontweight='bold')
fig, ax1 = plt.subplots(len(sec.index),figsize=(15,50))
#sns.set(font_scale=1)
plot=0
print('Sector-wise Job Postings')
print('__________________________________________________________________________________________________________________')
for i in sec.index:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
sns.set(font_scale=1)
ax1[plot].plot(d_it,label=i)
ax1[plot].legend()
plot=plot+1
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
#Determing rolling statistics
rolmean=timeseries.rolling(window=10).mean()
rolstd=timeseries.rolling(window=10).std()
#Plot rolling statistics:
orig = plt.plot(timeseries, color='blue',label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling Mean')
std = plt.plot(rolstd, color='black', label = 'Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show(block=False)
#Perform Dickey-Fuller test:
print ('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print(dfoutput)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
test_stationarity(d_it)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_log=np.log(d_it)
#demand_log.dropna(inplace=True)
test_stationarity(demand_log)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=MinMaxScaler(feature_range=(0,1))
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_nor=demand_nor.flatten()
demand_nor=pd.Series(demand_nor,index=index)
test_stationarity(demand_nor)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
demand_values=demand_values.reshape(len(demand_values),1)
std_scaler=StandardScaler()
std_scaler_fit=std_scaler.fit(demand_values)
demand_std=std_scaler.transform(demand_values)
index=d_it.index
demand_std=demand_std.flatten()
demand_std=pd.Series(demand_std,index=index)
test_stationarity(demand_std)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_sqrt=np.sqrt(d_it)
test_stationarity(demand_sqrt)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_cbrt=np.cbrt(d_it)
test_stationarity(demand_cbrt)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_diff=d_it.diff(periods=1)
demand_diff.dropna(inplace=True)
test_stationarity(demand_diff)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_diff2=d_it.diff(periods=2)
demand_diff2.dropna(inplace=True)
test_stationarity(demand_diff2)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_diff3=d_it.diff(periods=3)
demand_diff3.dropna(inplace=True)
test_stationarity(demand_diff3)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=PowerTransformer(method='box-cox')
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_pow_bc=demand_nor.flatten()
demand_pow_bc=pd.Series(demand_pow_bc,index=index)
test_stationarity(demand_pow_bc)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=PowerTransformer(method='yeo-johnson')
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_pow_yj=demand_nor.flatten()
demand_pow_yj=pd.Series(demand_pow_yj,index=index)
test_stationarity(demand_pow_yj)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
test_stationarity(demand_abs)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=RobustScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_rs=demand_nor.flatten()
demand_rs=pd.Series(demand_rs,index=index)
test_stationarity(demand_rs)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=QuantileTransformer(output_distribution='uniform')
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_qtu=demand_nor.flatten()
demand_qtu=pd.Series(demand_qtu,index=index)
test_stationarity(demand_qtu)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=QuantileTransformer(output_distribution='normal')
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_qtn=demand_nor.flatten()
demand_qtn=pd.Series(demand_qtn,index=index)
test_stationarity(demand_qtn)
print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=FunctionTransformer(np.log1p)
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_ft=demand_nor.flatten()
demand_ft=pd.Series(demand_ft,index=index)
test_stationarity(demand_ft)
print('------------------------------------------------------')
p=d=q=range(0,5)
pdq=list(itertools.product(p,d,q))
pdq[0:10]
def arima_model(ts):
aic=[]
for param in pdq:
try:
demand_model=ARIMA(ts,order=param)
demand_model_fit=demand_model.fit()
#print(param,"AIC = "+str(demand_model_fit.aic))
par=[param,demand_model_fit.aic]
aic.append(par)
except:
continue
aic_df=pd.DataFrame(aic)
aic_df=aic_df.set_index(aic_df[0])
aic_df=aic_df[1]
aic_sort=aic_df.sort_values(ascending=True)
best_aic=aic_sort.index[0]
print("Best values of p, d & q for ARIMA model are p = %d, d = %d & q = %d"%(best_aic[0],best_aic[1],best_aic[2]))
print("AIC =",aic_sort[0])
#print("____________________________________________________________________")
t=len(ts)-25
demand_train=ts[0:t]
demand_test=ts[t:]
demand_forecast=demand_model_fit.forecast(steps=25)[0]
jobs_error=mean_squared_error(demand_test,demand_forecast)
print("RSME = ",np.sqrt(jobs_error))
print("_____________________________________________________________________")
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_log=np.log(d_it)
#demand_log.dropna(inplace=True)
arima_model(demand_log)
#print('------------------------------------------------------')
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=MinMaxScaler(feature_range=(0,1))
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_nor=demand_nor.flatten()
demand_nor=pd.Series(demand_nor,index=index)
arima_model(demand_nor)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
demand_values=demand_values.reshape(len(demand_values),1)
std_scaler=StandardScaler()
std_scaler_fit=std_scaler.fit(demand_values)
demand_std=std_scaler.transform(demand_values)
index=d_it.index
demand_std=demand_std.flatten()
demand_std=pd.Series(demand_std,index=index)
arima_model(demand_std)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_sqrt=np.sqrt(d_it)
arima_model(demand_sqrt)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_cbrt=np.cbrt(d_it)
arima_model(demand_cbrt)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_diff=d_it.diff(periods=1)
demand_diff.dropna(inplace=True)
arima_model(demand_diff)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_diff2=d_it.diff(periods=2)
demand_diff2.dropna(inplace=True)
arima_model(demand_diff2)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=PowerTransformer(method='box-cox')
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_pow_bc=demand_nor.flatten()
demand_pow_bc=pd.Series(demand_pow_bc,index=index)
arima_model(demand_pow_bc)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=PowerTransformer(method='yeo-johnson')
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_pow_yj=demand_nor.flatten()
demand_pow_yj=pd.Series(demand_pow_yj,index=index)
arima_model(demand_pow_yj)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
arima_model(demand_abs)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=RobustScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_rs=demand_nor.flatten()
demand_rs=pd.Series(demand_rs,index=index)
arima_model(demand_rs)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=QuantileTransformer(output_distribution='uniform')
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_qtu=demand_nor.flatten()
demand_qtu=pd.Series(demand_qtu,index=index)
arima_model(demand_qtu)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=QuantileTransformer(output_distribution='normal')
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_qtn=demand_nor.flatten()
demand_qtn=pd.Series(demand_qtn,index=index)
arima_model(demand_qtn)
for i in sec.index[0:15]:
it=df1[df1['Sector']==i]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(i)
demand_values=d_it.values
scaler=FunctionTransformer(np.log1p)
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
#demand_nor=demand_nor.reshape(len(demand_values),1)
demand_ft=demand_nor.flatten()
demand_ft=pd.Series(demand_ft,index=index)
arima_model(demand_ft)
for i in sec.index[0:15]:
dtsl=[]
dts=pd.read_excel('DataTransformation_Sector/'+i+'.xlsx')
print(i,'\n')
dts.replace(u'\xa0',u'', regex=True, inplace=True)
dts=dts[(dts['P-VALUE']<= 0.05)&(dts['CRITICAL VALUE(1%)']>dts['TEST STATISTIC'])&(dts['CRITICAL VALUE(5%)']>dts['TEST STATISTIC'])]
dts=dts.sort_values('P-VALUE')
dtsl=dts['TRANSFORMATION'][0:5].values
j=1
for i in dtsl:
print(str(j)+'. '+i)
j+=1
print("___________________________________________________________________________________________n")
for i in sec.index[0:15]:
dtsl=[]
dts=pd.read_excel('DataTransformation_Sector/'+i+'.xlsx')
print(i)
print("------------------------------------\n")
dts.replace(u'\xa0',u'', regex=True, inplace=True)
dtsl=dts[(dts['P-VALUE']<= 0.05)&(dts['CRITICAL VALUE(1%)']>dts['TEST STATISTIC'])&(dts['CRITICAL VALUE(5%)']>dts['TEST STATISTIC'])&(dts['TRANSFORMATION']=='MaxAbsScaler Transformation')]
print(dtsl.values)
print("___________________________________________________________________________________________\n")
for i in sec.index[0:15]:
dtsl=[]
dts=pd.read_excel('DataTransformation_Sector/'+i+'.xlsx')
print(i)
print("------------------------------------\n")
dts.replace(u'\xa0',u'', regex=True, inplace=True)
dtsl=dts[(dts['P-VALUE']<= 0.05)&((dts['CRITICAL VALUE(1%)']>dts['TEST STATISTIC'])|(dts['CRITICAL VALUE(5%)']>dts['TEST STATISTIC']))&(dts['TRANSFORMATION']=='Cube Root Transfromation')]
print(dtsl.values)
print("___________________________________________________________________________________________\n")
sect_abs=['Sales', 'Information Technology', 'Business Consulting and Management',
'Retail', 'Marketing, Advertising and PR',
'Teacher Training and Education', 'Accounting, Finance and Banking',
'Engineering and Manufacturing', 'Healthcare', 'Media and Internet',
'Recruitment and HR', 'Transport and Logistics',
'Hospitality and Event Management']
sect_cbrt=['Property and Consultation','Public Service and Administration']
sect_abs_pdq=[(3,0,2),(1,0,0),(1,0,0),(1,0,1),(1,0,0),(0,0,0),(1,0,0),(0,0,0),(0,0,0),(0,0,0),(0,0,0),(0,0,1),(0,0,0)]
sect_cbrt_pdq=[(0,0,1),(0,0,0)]
it=df1[df1['Sector']==sect_abs[0]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[0]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[0])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[1]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[1]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[1])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[2]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[2]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[2])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[3]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[3]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[3])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[4]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[4]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[4])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[5]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[5]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[5])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[6]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[6]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[6])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[7]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[7]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[7])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[8]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[8]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[8])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[9]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[9]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[9])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[10]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[10]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[10])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic)10
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[11]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[11]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[11])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic)10
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_cbrt[1]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_cbrt[1]+" Prediction")
demand_cbrt=np.cbrt(d_it)
demand_model=ARIMA(demand_cbrt,order=sect_cbrt_pdq[1])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=114)[0]
fig, ax = plt.subplots()
ax=demand_model_fit.plot_predict(1,190,ax=ax)
print("AIC = "+str(demand_model_fit.aic))
demand_fore=np.power(demand_forecast,3)
index=pd.date_range(start='04/10/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
it=df1[df1['Sector']==sect_abs[12]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[12]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[12])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic)10
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
14